getwd()

##### CHANGE THIS TO YOUR WORKING DIRECTORY #####

setwd("C:/Users/carlos/Desktop/Carlos School/Harvard/Sophomore Year/Spring 2015/Gov 2001/qjps_12039_supp")

library(foreign)

##### Read in Stata File #####
data_ = read.dta("edited_data_.dta")
data = data_

##### Max out previous terms to 13 #####
data$terms[data$terms > 12] = 13
foo = vector(mode="list", length=5)

##### Replicate Regressions from initial paper #####
for (i in 1:5){
  if(i==1) {
    dv = data$VPCT_P_ALL_NC100 
  }
  if(i==2) {
    dv = data$N 
  }
  if(i==3) {
    dv = data$u_p 
  }
  if(i==4) {
    dv = data$goodchall_P_max_NC0
  }
  if(i==5) {
    dv = data$w_pr
  }
  reg = lm(dv ~ data$scand + 
             data$L_VPCT_P_ALL_NC100 +
             data$L_u_p +
             data$L_N +
             data$L_inc +
             data$t_1 +
             data$t_2 +
             data$t_3 +
             data$terms +
             data$dem_rep_oth
  )
  foo[[i]] = reg
}

for (i in 6:7){
  if(i==6) {
    dv = data$VPCT_G_ALL_NC100 
  }
  if(i==7) {
    dv = data$w_g 
  }
  reg = lm(dv ~ data$scand + 
             data$L_VPCT_P_ALL_NC100 +
             data$L_inc +
             data$dem_rep_oth +
             data$ip_1 +
             data$ip_2
  )
  foo[[i]] = reg
}

##### New Replication Information #####

##### Set dependent variable #####
dv = data$w_pr

##### Regression #####
reg = glm(dv ~ data$scand + 
           data$L_VPCT_P_ALL_NC100 +
           data$L_u_p +
           data$L_N +
           data$L_inc +
           data$t_1 +
           data$t_2 +
           data$t_3 +
           data$terms +
           data$dem_rep_oth
#          + data$scand:data$terms            
#          , family=binomial("logit")
)

##### Drop all columns that aren't part of the regression #####
temp = data[c("scand", "L_VPCT_P_ALL_NC100", "L_u_p", "L_N", "L_inc",
              "t_1", "t_2", "t_3",
              "terms", "dem_rep_oth")]

##### Potentially add interaction term if included in regression #####
#temp["inter"] = temp$terms*temp$scand


mat = summary(reg)$coefficients
xtable(mat)

Y_ <- as.matrix(data$scand)
X_ <- cbind(1,as.matrix(temp))
scandals <- Y_ == 1
x__ = X_[scandals,]
test = as.numeric(x__)
test_ = matrix(test, ncol = ncol(X_))
bl <- c(apply(test_, 2, mean, na.rm=T))
bl.table <- matrix(bl)
rownames(bl.table) <- colnames(x__)
colnames(bl.table) <- "Mean"

terms.seq <- seq(1,13,1)
bl_ <- matrix(rep(bl), nrow = length(terms.seq),ncol = length(bl), byrow = TRUE)
bl_[,7] <- terms.seq

##### Old = no scandal, new = scandal #####

bl.old <- bl_
bl.old[,2] <- 0
bl.new <- bl_
bl.new[,2] <- 1

temp1 = mat[,1]
temp2 = vcov(reg)
set.seed(12345)
betas <- mvrnorm(n = 50000, mu = temp1, Sigma = temp2)

exp.holder <- matrix(data = NA, ncol = 6, nrow = length(terms.seq))
for(i in 1:length(terms.seq)){
  old.pr.sim <- bl.old[i,]%*%t(betas)
  exp.holder[i,1] <- mean(old.pr.sim)
  exp.holder[i,2:3] <- quantile(x = old.pr.sim, probs = c(.025,.975))
  new.pr.sim <- bl.new[i,]%*%t(betas)
  exp.holder[i,4] <- mean(new.pr.sim)
  exp.holder[i,5:6] <- quantile(new.pr.sim, c(.025,.975))
}

#pdf(file = "Scandal v Prob Winning.pdf")
plot(terms.seq, 
     exp.holder[,4], 
     type = "l", 
     ylim = c(.8,1), 
     lwd = 2,
     col = "darkblue",
     xlab = "Terms Served",
     ylab = "Prob of Winning Primary",
     cex = .1)
lines(terms.seq, exp.holder[,1], col = "darkorange3", lwd = 2)
lines(terms.seq, exp.holder[,2], lwd = 1.2, col = "darkorange3", lty = 2)
lines(terms.seq, exp.holder[,3], lwd = 1.2, col = "darkorange3", lty = 2)
lines(terms.seq, exp.holder[,5], col = "darkblue", lwd = 1.2, lty = 2)
lines(terms.seq, exp.holder[,6], col = "darkblue", lwd = 1.2, lty = 2)
legend("bottomright", legend = c("Scandal","No Scandal"),
       lwd = c(2,2), col = c("darkblue","darkorange3"),
       cex = .6)
#graphics.off()